{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dcfa29dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from neo4j import GraphDatabase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c2a218fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "neo4j_host = \"bolt://52.206.105.229:7687\"\n",
    "neo4j_user = \"neo4j\"\n",
    "neo4j_password = \"ammonia-score-wrecks\"\n",
    "\n",
    "driver = GraphDatabase.driver(neo4j_host, auth=(neo4j_user, neo4j_password))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9cd97083",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create full text index\n",
    "\n",
    "create_full_text_index = \"\"\"\n",
    "CREATE FULLTEXT INDEX movie IF NOT EXISTS\n",
    "FOR (n:Movie)\n",
    "ON EACH [n.title]\n",
    "\"\"\"\n",
    "\n",
    "with driver.session() as session:\n",
    "    session.run(create_full_text_index)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ce71133",
   "metadata": {},
   "source": [
    "Download movie embedding file by using the following link and place the file in this directory.\n",
    "Link: https://drive.google.com/file/d/1ELuq3k1LtZZ_qt7nxe-H6PyHlyl_p3wG/view?usp=sharing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e65c80a4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Importing 0 batch of embeddings\n",
      "Importing 1000 batch of embeddings\n",
      "Importing 2000 batch of embeddings\n",
      "Importing 3000 batch of embeddings\n",
      "Importing 4000 batch of embeddings\n",
      "Importing 5000 batch of embeddings\n",
      "Importing 6000 batch of embeddings\n",
      "Importing 7000 batch of embeddings\n",
      "Importing 8000 batch of embeddings\n",
      "Importing 9000 batch of embeddings\n"
     ]
    }
   ],
   "source": [
    "# Import embeddings\n",
    "\n",
    "embedding_df = pd.read_csv(\"movie_embeddings.csv\")\n",
    "params = embedding_df.values\n",
    "step = 1000\n",
    "\n",
    "with driver.session() as session:\n",
    "    for i in range(0, len(embedding_df), step):\n",
    "        print(f\"Importing {i} batch of embeddings\")\n",
    "        batch = [{'id': str(x[0]), 'embedding': x[2]}\n",
    "                 for x in params[i:i+step]]\n",
    "        session.run(\"\"\"\n",
    "        UNWIND $data AS row\n",
    "        MATCH (m:Movie {movieId:row.id})\n",
    "        SET m.embedding = apoc.convert.fromJsonList(row.embedding);\n",
    "        \"\"\", {'data': batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d2ff441",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "scrape",
   "language": "python",
   "name": "scrape"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
